In [1]:
# windows only hack for graphviz path 
import os
for path in os.environ['PATH'].split(os.pathsep):
    if path.endswith("Library\\bin"):
        os.environ['PATH']+=os.pathsep+os.path.join(path, 'graphviz')

In [2]:
from PIL import Image
import numpy as np

In [3]:
import gzip
import pickle
with gzip.open("../Week02/mnist.pkl.gz", 'rb') as f:
    train_set, validation_set, test_set = pickle.load(f, encoding='latin1')

In [4]:
train_X, train_y = train_set
validation_X, validation_y = validation_set
test_X, test_y = test_set

In [5]:
from IPython.display import display
def showX(X):
    int_X = (X*255).clip(0,255).astype('uint8')
    # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N
    int_X_reshape = int_X.reshape(-1,28,28).swapaxes(0,1).reshape(28,-1)
    display(Image.fromarray(int_X_reshape))
# 訓練資料, X 的前 20 筆
showX(train_X[:20])
print(train_y)


[5 0 4 ..., 8 4 8]

Q

看一下 mnist 資料

開始 Tensorflow


In [6]:
import tensorflow as tf
from tfdot import tfdot

Softmax regression

基本上就是用 $ e ^ {W x +b} $ 的比例來計算機率

其中 x 是長度 784 的向量(圖片), W 是 10x784矩陣,加上一個長度為 10 的向量。 算出來的十個數值,依照比例當成我們預估的機率。


In [7]:
# 輸入的 placeholder
X = tf.placeholder(tf.float32, shape=[None, 784], name="X")
# 權重參數,為了計算方便和一些慣例(行向量及列向量的差異),矩陣乘法的方向和上面解說相反
W = tf.Variable(tf.zeros([784, 10]), name='W')
b = tf.Variable(tf.zeros([10]), name='b') # 這裡可以看成是列向量

tfdot()


Out[7]:
root cluster_b b cluster_W W W/Assign Assign: Assign output: (784, 10) float32_ref W VariableV2: W output: (784, 10) float32_ref W/Assign->W W/read Identity: read output: (784, 10) float32 b/Assign Assign: Assign output: (10,) float32_ref b VariableV2: b output: (10,) float32_ref b/Assign->b b/read Identity: read output: (10,) float32 X Placeholder: X output: (?, 784) float32 zeros Const: zeros output: (784, 10) float32 zeros->W/Assign W->W/read zeros_1 Const: zeros_1 output: (10,) float32 zeros_1->b/Assign b->b/read

In [8]:
# 計算出來的公式
Y = tf.exp(tf.matmul(X, W) +b, name="Y")
Y_softmax = tf.nn.softmax(Y, name="Y_softmax")
# or 
#Y_softmax = tf.div(Y, tf.reduce_sum(Y, axis=1, keep_dims=True), name="Y_softmax")
tfdot()


Out[8]:
root cluster_W W cluster_b b W/Assign Assign: Assign output: (784, 10) float32_ref W VariableV2: W output: (784, 10) float32_ref W/Assign->W W/read Identity: read output: (784, 10) float32 MatMul MatMul: MatMul output: (?, 10) float32 W/read->MatMul b/Assign Assign: Assign output: (10,) float32_ref b VariableV2: b output: (10,) float32_ref b/Assign->b b/read Identity: read output: (10,) float32 add Add: add output: (?, 10) float32 b/read->add X Placeholder: X output: (?, 784) float32 X->MatMul zeros Const: zeros output: (784, 10) float32 zeros->W/Assign W->W/read zeros_1 Const: zeros_1 output: (10,) float32 zeros_1->b/Assign b->b/read MatMul->add Y Exp: Y output: (?, 10) float32 add->Y Y_softmax Softmax: Y_softmax output: (?, 10) float32 Y->Y_softmax

Loss function 的計算是 cross_entorpy.

基本上就是 $-log(\Pr(Y_{true}))$


In [9]:
# 真正的 Y
Y_ = tf.placeholder(tf.float32, shape=[None, 10], name="Y_")
#和算出來的 Y 來做 cross entropy
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(Y_*tf.log(Y_softmax), axis=1))
# or
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y_, logits=Y))
tfdot()


Out[9]:
root cluster_Slice_1 Slice_1 cluster_Sub Sub cluster_Slice_2 Slice_2 cluster_Sub_2 Sub_2 cluster_b b cluster_W W cluster_Sub_1 Sub_1 cluster_concat_1 concat_1 cluster_concat concat cluster_Slice Slice Slice_1/begin Pack: begin output: (1,) int32 Slice_1 Slice: Slice_1 output: (1,) int32 Slice_1/begin->Slice_1 Slice_1/size Const: size output: (1,) int32 Slice_1/size->Slice_1 concat/values_0 Const: values_0 output: (1,) int32 concat ConcatV2: concat output: (2,) int32 concat/values_0->concat concat/axis Const: axis output: () int32 concat/axis->concat Sub/y Const: y output: () int32 Sub Sub: Sub output: () int32 Sub/y->Sub concat_1/values_0 Const: values_0 output: (1,) int32 concat_1 ConcatV2: concat_1 output: (2,) int32 concat_1/values_0->concat_1 concat_1/axis Const: axis output: () int32 concat_1/axis->concat_1 Slice/begin Pack: begin output: (1,) int32 Slice Slice: Slice output: (1,) int32 Slice/begin->Slice Slice/size Const: size output: (1,) int32 Slice/size->Slice b/Assign Assign: Assign output: (10,) float32_ref b VariableV2: b output: (10,) float32_ref b/Assign->b b/read Identity: read output: (10,) float32 add Add: add output: (?, 10) float32 b/read->add Sub_2/y Const: y output: () int32 Sub_2 Sub: Sub_2 output: () int32 Sub_2/y->Sub_2 Slice_2/begin Const: begin output: (1,) int32 Slice_2 Slice: Slice_2 output: (?,) int32 Slice_2/begin->Slice_2 Slice_2/size Pack: size output: (1,) int32 Slice_2/size->Slice_2 W/Assign Assign: Assign output: (784, 10) float32_ref W VariableV2: W output: (784, 10) float32_ref W/Assign->W W/read Identity: read output: (784, 10) float32 MatMul MatMul: MatMul output: (?, 10) float32 W/read->MatMul Sub_1/y Const: y output: () int32 Sub_1 Sub: Sub_1 output: () int32 Sub_1/y->Sub_1 X Placeholder: X output: (?, 784) float32 X->MatMul zeros Const: zeros output: (784, 10) float32 zeros->W/Assign W->W/read zeros_1 Const: zeros_1 output: (10,) float32 zeros_1->b/Assign b->b/read MatMul->add Y Exp: Y output: (?, 10) float32 add->Y Y_softmax Softmax: Y_softmax output: (?, 10) float32 Y->Y_softmax Shape Shape: Shape output: (2,) int32 Y->Shape Shape_1 Shape: Shape_1 output: (2,) int32 Y->Shape_1 Reshape Reshape: Reshape output: (?, ?) float32 Y->Reshape Y_ Placeholder: Y_ output: (?, 10) float32 Shape_2 Shape: Shape_2 output: (2,) int32 Y_->Shape_2 Reshape_1 Reshape: Reshape_1 output: (?, ?) float32 Y_->Reshape_1 Rank Const: Rank output: () int32 Rank->Sub_2 Shape->Slice_2 Rank_1 Const: Rank_1 output: () int32 Rank_1->Sub Shape_1->Slice Sub->Slice/begin Slice->concat concat->Reshape SoftmaxCrossEntropyWithLogits SoftmaxCrossEntropyWithLogits: SoftmaxCrossEntropyWithLogits output: (?,) float32 output: (?, ?) float32 Reshape->SoftmaxCrossEntropyWithLogits Rank_2 Const: Rank_2 output: () int32 Rank_2->Sub_1 Shape_2->Slice_1 Sub_1->Slice_1/begin Slice_1->concat_1 concat_1->Reshape_1 Reshape_1->SoftmaxCrossEntropyWithLogits Reshape_2 Reshape: Reshape_2 output: (?,) float32 SoftmaxCrossEntropyWithLogits->Reshape_2 Sub_2->Slice_2/size Slice_2->Reshape_2 Mean Mean: Mean output: () float32 Reshape_2->Mean Const Const: Const output: (1,) int32 Const->Mean

In [10]:
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

tfdot(size=(15,30))


Out[10]:
root cluster_Slice_1 Slice_1 cluster_Sub Sub cluster_gradients gradients cluster_gradients/Mean_grad gradients/Mean_grad cluster_gradients/Mean_grad/Reshape gradients/Mean_grad/Reshape cluster_gradients/Mean_grad/Maximum gradients/Mean_grad/Maximum cluster_gradients/MatMul_grad gradients/MatMul_grad cluster_gradients/MatMul_grad/tuple gradients/MatMul_grad/tuple cluster_gradients/add_grad gradients/add_grad cluster_gradients/add_grad/tuple gradients/add_grad/tuple cluster_gradients/Reshape_2_grad gradients/Reshape_2_grad cluster_gradients/Reshape_grad gradients/Reshape_grad cluster_gradients/Y_grad gradients/Y_grad cluster_gradients/SoftmaxCrossEntropyWithLogits_grad gradients/SoftmaxCrossEntropyWithLogits_grad cluster_gradients/SoftmaxCrossEntropyWithLogits_grad/ExpandDims gradients/SoftmaxCrossEntropyWithLogits_grad/ExpandDims cluster_concat_1 concat_1 cluster_concat concat cluster_GradientDescent GradientDescent cluster_GradientDescent/update_W GradientDescent/update_W cluster_GradientDescent/update_b GradientDescent/update_b cluster_Slice Slice cluster_W W cluster_b b cluster_Sub_2 Sub_2 cluster_Slice_2 Slice_2 cluster_Sub_1 Sub_1 Slice_1/begin Pack: begin output: (1,) int32 Slice_1 Slice: Slice_1 output: (1,) int32 Slice_1/begin->Slice_1 Slice_1/size Const: size output: (1,) int32 Slice_1/size->Slice_1 concat/values_0 Const: values_0 output: (1,) int32 concat ConcatV2: concat output: (2,) int32 concat/values_0->concat concat/axis Const: axis output: () int32 concat/axis->concat Sub/y Const: y output: () int32 Sub Sub: Sub output: () int32 Sub/y->Sub concat_1/values_0 Const: values_0 output: (1,) int32 concat_1 ConcatV2: concat_1 output: (2,) int32 concat_1/values_0->concat_1 concat_1/axis Const: axis output: () int32 concat_1/axis->concat_1 GradientDescent/update_W/ApplyGradientDescent ApplyGradientDescent: ApplyGradientDescent output: (784, 10) float32_ref GradientDescent/update_b/ApplyGradientDescent ApplyGradientDescent: ApplyGradientDescent output: (10,) float32_ref GradientDescent/learning_rate Const: learning_rate output: () float32 GradientDescent/learning_rate->GradientDescent/update_W/ApplyGradientDescent GradientDescent/learning_rate->GradientDescent/update_b/ApplyGradientDescent gradients/MatMul_grad/tuple/group_deps NoOp: group_deps gradients/MatMul_grad/tuple/control_dependency Identity: control_dependency output: (?, 784) float32 gradients/MatMul_grad/tuple/control_dependency_1 Identity: control_dependency_1 output: (784, 10) float32 gradients/MatMul_grad/tuple/control_dependency_1->GradientDescent/update_W/ApplyGradientDescent gradients/MatMul_grad/MatMul MatMul: MatMul output: (?, 784) float32 gradients/MatMul_grad/MatMul->gradients/MatMul_grad/tuple/control_dependency gradients/MatMul_grad/MatMul_1 MatMul: MatMul_1 output: (784, 10) float32 gradients/MatMul_grad/MatMul_1->gradients/MatMul_grad/tuple/control_dependency_1 gradients/Mean_grad/Maximum/y Const: y output: () int32 gradients/Mean_grad/Maximum Maximum: Maximum output: () int32 gradients/Mean_grad/Maximum/y->gradients/Mean_grad/Maximum gradients/Mean_grad/Reshape/shape Const: shape output: (1,) int32 gradients/Mean_grad/Reshape Reshape: Reshape output: (1,) float32 gradients/Mean_grad/Reshape/shape->gradients/Mean_grad/Reshape gradients/Mean_grad/Tile Tile: Tile output: (?,) float32 gradients/Mean_grad/Reshape->gradients/Mean_grad/Tile gradients/Mean_grad/Shape Shape: Shape output: (1,) int32 gradients/Mean_grad/Shape->gradients/Mean_grad/Tile gradients/Mean_grad/truediv RealDiv: truediv output: (?,) float32 gradients/Mean_grad/Tile->gradients/Mean_grad/truediv gradients/Mean_grad/Shape_1 Shape: Shape_1 output: (1,) int32 gradients/Mean_grad/Prod Prod: Prod output: () int32 gradients/Mean_grad/Shape_1->gradients/Mean_grad/Prod gradients/Mean_grad/Shape_2 Const: Shape_2 output: (0,) int32 gradients/Mean_grad/Prod_1 Prod: Prod_1 output: () int32 gradients/Mean_grad/Shape_2->gradients/Mean_grad/Prod_1 gradients/Mean_grad/Const Const: Const output: (1,) int32 gradients/Mean_grad/Const->gradients/Mean_grad/Prod gradients/Mean_grad/floordiv FloorDiv: floordiv output: () int32 gradients/Mean_grad/Prod->gradients/Mean_grad/floordiv gradients/Mean_grad/Const_1 Const: Const_1 output: (1,) int32 gradients/Mean_grad/Const_1->gradients/Mean_grad/Prod_1 gradients/Mean_grad/Prod_1->gradients/Mean_grad/Maximum gradients/Mean_grad/Maximum->gradients/Mean_grad/floordiv gradients/Mean_grad/Cast Cast: Cast output: () float32 gradients/Mean_grad/floordiv->gradients/Mean_grad/Cast gradients/Mean_grad/Cast->gradients/Mean_grad/truediv gradients/Reshape_2_grad/Reshape Reshape: Reshape output: (?,) float32 gradients/Mean_grad/truediv->gradients/Reshape_2_grad/Reshape gradients/add_grad/tuple/group_deps NoOp: group_deps gradients/add_grad/tuple/control_dependency Identity: control_dependency output: (?, 10) float32 gradients/add_grad/tuple/control_dependency->gradients/MatMul_grad/MatMul gradients/add_grad/tuple/control_dependency->gradients/MatMul_grad/MatMul_1 gradients/add_grad/tuple/control_dependency_1 Identity: control_dependency_1 output: (10,) float32 gradients/add_grad/tuple/control_dependency_1->GradientDescent/update_b/ApplyGradientDescent gradients/add_grad/Shape Shape: Shape output: (2,) int32 gradients/add_grad/BroadcastGradientArgs BroadcastGradientArgs: BroadcastGradientArgs output: (?,) int32 output: (?,) int32 gradients/add_grad/Shape->gradients/add_grad/BroadcastGradientArgs gradients/add_grad/Reshape Reshape: Reshape output: (?, 10) float32 gradients/add_grad/Shape->gradients/add_grad/Reshape gradients/add_grad/Shape_1 Const: Shape_1 output: (1,) int32 gradients/add_grad/Shape_1->gradients/add_grad/BroadcastGradientArgs gradients/add_grad/Reshape_1 Reshape: Reshape_1 output: (10,) float32 gradients/add_grad/Shape_1->gradients/add_grad/Reshape_1 gradients/add_grad/Sum Sum: Sum output: <unknown> float32 gradients/add_grad/BroadcastGradientArgs->gradients/add_grad/Sum gradients/add_grad/Sum_1 Sum: Sum_1 output: <unknown> float32 gradients/add_grad/BroadcastGradientArgs->gradients/add_grad/Sum_1 gradients/add_grad/Sum->gradients/add_grad/Reshape gradients/add_grad/Reshape->gradients/add_grad/tuple/control_dependency gradients/add_grad/Sum_1->gradients/add_grad/Reshape_1 gradients/add_grad/Reshape_1->gradients/add_grad/tuple/control_dependency_1 gradients/SoftmaxCrossEntropyWithLogits_grad/ExpandDims/dim Const: dim output: () int32 gradients/SoftmaxCrossEntropyWithLogits_grad/ExpandDims ExpandDims: ExpandDims output: (?, 1) float32 gradients/SoftmaxCrossEntropyWithLogits_grad/ExpandDims/dim->gradients/SoftmaxCrossEntropyWithLogits_grad/ExpandDims gradients/SoftmaxCrossEntropyWithLogits_grad/PreventGradient PreventGradient: PreventGradient output: (?, ?) float32 gradients/SoftmaxCrossEntropyWithLogits_grad/mul Mul: mul output: (?, ?) float32 gradients/SoftmaxCrossEntropyWithLogits_grad/PreventGradient->gradients/SoftmaxCrossEntropyWithLogits_grad/mul gradients/SoftmaxCrossEntropyWithLogits_grad/ExpandDims->gradients/SoftmaxCrossEntropyWithLogits_grad/mul gradients/Reshape_grad/Reshape Reshape: Reshape output: (?, 10) float32 gradients/SoftmaxCrossEntropyWithLogits_grad/mul->gradients/Reshape_grad/Reshape gradients/Y_grad/mul Mul: mul output: (?, 10) float32 gradients/Y_grad/mul->gradients/add_grad/Sum gradients/Y_grad/mul->gradients/add_grad/Sum_1 gradients/Reshape_grad/Shape Shape: Shape output: (2,) int32 gradients/Reshape_grad/Shape->gradients/Reshape_grad/Reshape gradients/Reshape_grad/Reshape->gradients/Y_grad/mul gradients/Reshape_2_grad/Shape Shape: Shape output: (1,) int32 gradients/Reshape_2_grad/Shape->gradients/Reshape_2_grad/Reshape gradients/Reshape_2_grad/Reshape->gradients/SoftmaxCrossEntropyWithLogits_grad/ExpandDims gradients/Shape Const: Shape output: (0,) int32 gradients/Fill Fill: Fill output: () float32 gradients/Shape->gradients/Fill gradients/Const Const: Const output: () float32 gradients/Const->gradients/Fill gradients/Fill->gradients/Mean_grad/Reshape gradients/zeros_like ZerosLike: zeros_like output: (?, ?) float32 Slice/begin Pack: begin output: (1,) int32 Slice Slice: Slice output: (1,) int32 Slice/begin->Slice Slice/size Const: size output: (1,) int32 Slice/size->Slice b/Assign Assign: Assign output: (10,) float32_ref b VariableV2: b output: (10,) float32_ref b/Assign->b b/read Identity: read output: (10,) float32 add Add: add output: (?, 10) float32 b/read->add Sub_2/y Const: y output: () int32 Sub_2 Sub: Sub_2 output: () int32 Sub_2/y->Sub_2 Slice_2/begin Const: begin output: (1,) int32 Slice_2 Slice: Slice_2 output: (?,) int32 Slice_2/begin->Slice_2 Slice_2/size Pack: size output: (1,) int32 Slice_2/size->Slice_2 W/Assign Assign: Assign output: (784, 10) float32_ref W VariableV2: W output: (784, 10) float32_ref W/Assign->W W/read Identity: read output: (784, 10) float32 W/read->gradients/MatMul_grad/MatMul MatMul MatMul: MatMul output: (?, 10) float32 W/read->MatMul Sub_1/y Const: y output: () int32 Sub_1 Sub: Sub_1 output: () int32 Sub_1/y->Sub_1 X Placeholder: X output: (?, 784) float32 X->gradients/MatMul_grad/MatMul_1 X->MatMul zeros Const: zeros output: (784, 10) float32 zeros->W/Assign W->GradientDescent/update_W/ApplyGradientDescent W->W/read zeros_1 Const: zeros_1 output: (10,) float32 zeros_1->b/Assign b->GradientDescent/update_b/ApplyGradientDescent b->b/read MatMul->gradients/add_grad/Shape MatMul->add Y Exp: Y output: (?, 10) float32 add->Y Y->gradients/Y_grad/mul Y->gradients/Reshape_grad/Shape Y_softmax Softmax: Y_softmax output: (?, 10) float32 Y->Y_softmax Shape Shape: Shape output: (2,) int32 Y->Shape Shape_1 Shape: Shape_1 output: (2,) int32 Y->Shape_1 Reshape Reshape: Reshape output: (?, ?) float32 Y->Reshape Y_ Placeholder: Y_ output: (?, 10) float32 Shape_2 Shape: Shape_2 output: (2,) int32 Y_->Shape_2 Reshape_1 Reshape: Reshape_1 output: (?, ?) float32 Y_->Reshape_1 Rank Const: Rank output: () int32 Rank->Sub_2 Shape->Slice_2 Rank_1 Const: Rank_1 output: () int32 Rank_1->Sub Shape_1->Slice Sub->Slice/begin Slice->concat concat->Reshape SoftmaxCrossEntropyWithLogits SoftmaxCrossEntropyWithLogits: SoftmaxCrossEntropyWithLogits output: (?,) float32 output: (?, ?) float32 Reshape->SoftmaxCrossEntropyWithLogits Rank_2 Const: Rank_2 output: () int32 Rank_2->Sub_1 Shape_2->Slice_1 Sub_1->Slice_1/begin Slice_1->concat_1 concat_1->Reshape_1 Reshape_1->SoftmaxCrossEntropyWithLogits SoftmaxCrossEntropyWithLogits->gradients/SoftmaxCrossEntropyWithLogits_grad/PreventGradient SoftmaxCrossEntropyWithLogits->gradients/Reshape_2_grad/Shape SoftmaxCrossEntropyWithLogits->gradients/zeros_like Reshape_2 Reshape: Reshape_2 output: (?,) float32 SoftmaxCrossEntropyWithLogits->Reshape_2 Sub_2->Slice_2/size Slice_2->Reshape_2 Reshape_2->gradients/Mean_grad/Shape Reshape_2->gradients/Mean_grad/Shape_1 Mean Mean: Mean output: () float32 Reshape_2->Mean Const Const: Const output: (1,) int32 Const->Mean GradientDescent NoOp: GradientDescent

In [11]:
train_Y = np.eye(10)[train_y]
test_Y = np.eye(10)[test_y]
validation_Y = np.eye(10)[validation_y]

In [12]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

In [13]:
for i in range(1000):
    rnd_idx = np.random.choice(train_X.shape[0], 50, replace=False)
    train_step.run(feed_dict={X: train_X[rnd_idx], Y_:train_Y[rnd_idx]})

In [14]:
Y.eval(feed_dict={X: train_X[:10]})


Out[14]:
array([[  2.67663455,   0.68965906,   2.86003971,   6.75735569,
          0.67076093,   8.54210854,   2.0426271 ,   2.52033615,
          2.6704216 ,   2.02782583],
       [ 17.41659546,   0.27216622,   2.00843072,   2.00070858,
          0.47167033,   3.96251845,   1.27458858,   1.71361887,
          1.57485271,   1.42768598],
       [  1.18077707,   0.63925374,   1.55139363,   1.98670709,
          3.55372834,   0.79400146,   1.32967412,   1.39427495,
          1.320485  ,   1.84186018],
       [  0.79309028,   9.80140781,   3.76671576,   2.84159708,
          1.10215747,   1.37486482,   1.47885072,   1.88079464,
          3.48577738,   1.39905679],
       [  1.11942303,   1.77244854,   1.37459433,   1.87246704,
          6.09458733,   2.10092139,   1.75259876,   8.16937256,
          4.86684036,  13.76508617],
       [  2.19461823,   0.58457804,   8.22708225,   2.35705256,
          1.17554307,   2.50714231,   1.30046439,   3.0871973 ,
          3.44917345,   4.18956423],
       [  0.36851412,  18.05165863,   2.72908163,   5.87202549,
          0.90705621,   2.28930664,   2.13540125,   1.5804801 ,
          4.15266609,   2.0588882 ],
       [  3.30706835,   0.68421167,   5.18427134,  21.50319481,
          1.42809224,   5.27209568,   1.01299739,   1.73568618,
          6.05581427,   4.26227188],
       [  0.51825446,   8.85924721,   1.63298714,   3.17394423,
          0.83018583,   2.1196959 ,   1.53450501,   1.55919349,
          3.04275322,   1.85751605],
       [  1.53730035,   0.74701214,   1.94078231,   1.3744117 ,
         11.44963646,   2.55936027,   3.61604095,   2.06410933,
          2.95397353,   2.79321051]], dtype=float32)

In [15]:
prediction = tf.argmax(Y, axis=1)

# print predictions
prediction.eval(feed_dict={X: train_X[:10]})


Out[15]:
array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=int64)

In [16]:
# print labels
showX(train_X[:10])
train_y[:10]


Out[16]:
array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=int64)

In [17]:
correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_, 1))

correct_prediction.eval({X: train_X[:10] , Y_: train_Y[:10]})


Out[17]:
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,  True], dtype=bool)

In [18]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

accuracy.eval(feed_dict={X: train_X[:10] , Y_: train_Y[:10]})


Out[18]:
1.0

In [19]:
accuracy.eval(feed_dict={X: train_X , Y_: train_Y})


Out[19]:
0.90256

In [20]:
# 合在一起來看
for t in range(10):
    for i in range(1000):
        rnd_idx = np.random.choice(train_X.shape[0], 200, replace=False)
        train_step.run(feed_dict={X: train_X[rnd_idx], Y_:train_Y[rnd_idx]})
    a = accuracy.eval({X: validation_X , Y_: validation_Y})
    print (t, a)


0 0.9243
1 0.9256
2 0.928
3 0.9278
4 0.9281
5 0.9292
6 0.9284
7 0.9302
8 0.9288
9 0.9292

In [21]:
accuracy.eval({X: test_X , Y_: test_Y})


Out[21]:
0.92460001

In [22]:
sess.close()

Multilayer Convolutional Network


In [23]:
# 重設 session 和 graph
tf.reset_default_graph()
# 輸入還是一樣
X = tf.placeholder(tf.float32, shape=[None, 784], name="X")
Y_ = tf.placeholder(tf.float32, shape=[None, 10], name="Y_")

In [24]:
# 設定 weight 和 bais
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name ='W')
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name = 'b')

In [25]:
# 設定 cnn 的 layers
def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(X):
    return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

In [26]:
# fisrt layer
with tf.name_scope('conv1'):
    ## variables
    W_conv1 = weight_variable([3,3,1,32])
    b_conv1 = bias_variable([32])
    ## build the layer
    X_image = tf.reshape(X, [-1, 28, 28, 1])
    h_conv1 = tf.nn.relu(conv2d(X_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

tfdot()


Out[26]:
root cluster_conv1 conv1 cluster_conv1/Reshape conv1/Reshape cluster_conv1/b conv1/b cluster_conv1/W conv1/W cluster_conv1/truncated_normal conv1/truncated_normal conv1/Reshape/shape Const: shape output: (4,) int32 conv1/Reshape Reshape: Reshape output: (?, 28, 28, 1) float32 conv1/Reshape/shape->conv1/Reshape conv1/W/Assign Assign: Assign output: (3, 3, 1, 32) float32_ref conv1/W VariableV2: W output: (3, 3, 1, 32) float32_ref conv1/W/Assign->conv1/W conv1/W/read Identity: read output: (3, 3, 1, 32) float32 conv1/Conv2D Conv2D: Conv2D output: (?, 28, 28, 32) float32 conv1/W/read->conv1/Conv2D conv1/truncated_normal/shape Const: shape output: (4,) int32 conv1/truncated_normal/TruncatedNormal TruncatedNormal: TruncatedNormal output: (3, 3, 1, 32) float32 conv1/truncated_normal/shape->conv1/truncated_normal/TruncatedNormal conv1/truncated_normal/mean Const: mean output: () float32 conv1/truncated_normal Add: truncated_normal output: (3, 3, 1, 32) float32 conv1/truncated_normal/mean->conv1/truncated_normal conv1/truncated_normal/stddev Const: stddev output: () float32 conv1/truncated_normal/mul Mul: mul output: (3, 3, 1, 32) float32 conv1/truncated_normal/stddev->conv1/truncated_normal/mul conv1/truncated_normal/TruncatedNormal->conv1/truncated_normal/mul conv1/truncated_normal/mul->conv1/truncated_normal conv1/b/Assign Assign: Assign output: (32,) float32_ref conv1/b VariableV2: b output: (32,) float32_ref conv1/b/Assign->conv1/b conv1/b/read Identity: read output: (32,) float32 conv1/add Add: add output: (?, 28, 28, 32) float32 conv1/b/read->conv1/add conv1/truncated_normal->conv1/W/Assign conv1/W->conv1/W/read conv1/Const Const: Const output: (32,) float32 conv1/Const->conv1/b/Assign conv1/b->conv1/b/read conv1/Reshape->conv1/Conv2D conv1/Conv2D->conv1/add conv1/Relu Relu: Relu output: (?, 28, 28, 32) float32 conv1/add->conv1/Relu conv1/MaxPool MaxPool: MaxPool output: (?, 14, 14, 32) float32 conv1/Relu->conv1/MaxPool X Placeholder: X output: (?, 784) float32 X->conv1/Reshape Y_ Placeholder: Y_ output: (?, 10) float32

In [27]:
# second layer
with tf.name_scope('conv2'):
    ## variables
    W_conv2 = weight_variable([3,3,32,64])
    b_conv2 = bias_variable([64])
    ## build the layer
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

In [28]:
# fully-connected layer
with tf.name_scope('full'):
    W_fc1 = weight_variable([7*7*64, 1024])
    b_fc1 = bias_variable([1024])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1)+b_fc1)

In [29]:
# Dropout:  A Simple Way to Prevent Neural Networks from Over fitting
# https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder("float", name="keep_prob")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Readout
with tf.name_scope('readout'):
    W_fc2 = weight_variable([1024,10])
    b_fc2 = bias_variable([10])
    Y = tf.matmul(h_fc1_drop, W_fc2)+b_fc2

In [30]:
cross_entropy =  tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y_, logits=Y))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
prediction = tf.argmax(Y, 1, name="prediction")
correct_prediction = tf.equal(prediction, tf.argmax(Y_, 1), name="correction")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")

In [31]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

In [32]:
%%timeit -r 1 -n 1
for i in range(5000):
    rnd_idx = np.random.choice(train_X.shape[0], 50, replace=False)
    if i%250 == 0:
        validation_accuracy = accuracy.eval({
                X: validation_X[:200], Y_: validation_Y[:200], keep_prob: 1.0 })
        print("step %d, validation accuracy %g"%(i, validation_accuracy))
    train_step.run({X: train_X[rnd_idx], Y_: train_Y[rnd_idx], keep_prob: 0.5 })


step 0, validation accuracy 0.105
step 250, validation accuracy 0.93
step 500, validation accuracy 0.96
step 750, validation accuracy 0.955
step 1000, validation accuracy 0.97
step 1250, validation accuracy 0.98
step 1500, validation accuracy 0.985
step 1750, validation accuracy 0.99
step 2000, validation accuracy 0.995
step 2250, validation accuracy 0.985
step 2500, validation accuracy 0.99
step 2750, validation accuracy 0.99
step 3000, validation accuracy 0.99
step 3250, validation accuracy 0.99
step 3500, validation accuracy 0.995
step 3750, validation accuracy 0.995
step 4000, validation accuracy 0.995
step 4250, validation accuracy 0.995
step 4500, validation accuracy 0.99
step 4750, validation accuracy 0.995
1 loop, best of 1: 7min 1s per loop

In [33]:
np.mean([accuracy.eval({X: test_X[i:i+1000], 
                        Y_: test_Y[i:i+1000],
                                   keep_prob: 1.0}) 
        for i in range(0, test_X.shape[0], 1000)]
)


Out[33]:
0.98379993

In [34]:
tf.train.write_graph(sess.graph_def, "./", "mnist_simple.pb", as_text=False)


Out[34]:
'./mnist_simple.pb'